EG started this on 20160403 how does the noise effect the loglikelihood Weibull model->theta is a scale, gamma is a shape parameter >0 Gompertz Model-> beta(G) is a scale, alpha(R) is a shape parameter>0
difference function of loglikelihood function of gompertz and weibull p.d.fs test if L(Weibull,X)>L(Gompertz,X) for parameters Weibull model->theta is a scale, gamma is a shape parameter >0 Gompertz Model-> beta is a scale, alpha is a shape parameter>0
For additive Gaussian noise e ~ N (0, sigma^2) with known variance sigma^2 sd of gaussian noise function max sd would be = 3*mean(inverse.gomp.CDF) min sd would be mean(inverse.gomp.CDF)
require(flexsurv)
## Loading required package: flexsurv
## Loading required package: survival
require(gplots)
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
#theta=0.025
#gamma=0.001
#test G and R in nested for loops
beta= 0.034
#G G= (0.1,0.25)
alpha=0.01
#R R= (0.001,0.1)
# population size
N=100;
## Introduce random Gompertz function
#rgompertz(alpha,beta,N) gives random Gompertz numbers from inverse CDF of Gompertz
#where alpha and beta are 2 parameters, N is number of population
#generate gompertz random numbers by using inverse CDF
#generate random number with a given distribution of Gompertz
#prediction
rgompertz = function(alpha,beta, N){
x.uniform = runif(N)
#inverse of Gompertz CDF
inverse.gomp.CDF = function(alpha,beta,y) { (1/beta)*log(1 - (beta/alpha)*log(1-y) ) }
x.gompertz = inverse.gomp.CDF(alpha,beta, x.uniform)
return(x.gompertz)
}
##Introduce random Weibull function
rweibull= function(theta,gamma,N)
{
x.uniform= runif(n)
inverse.wei.CDF=function(theta,gamma,y) { theta*(-log(1-y))^(1/gamma)}
x.weibull=inverse.wei.CDF(theta,gamma,x.uniform)
return(x.weibull)
}
#create a function that calculates noise of lifespan
calculate.noise = function(i){
#lifespan
gaussian<-rnorm(N, mean = 0, sd=i)
#observation
#X<- gompertz.random +gaussian to be used in simulation later.
#noise
noise=sd(gaussian)
return(noise)
}
#generate gompertz random numbers (lifespan)
#prediction
gompertz.random<-rgompertz(alpha,beta,N)
average.lifespan=mean(gompertz.random)
#initiliaze arrays for the variable values from loops
sderr<- list()
Delta_LL<-list()
G<-list()
R<-list()
LWei<-list()
LGomp<-list()
MeanLF<-list()
sdLS<-list()
Delta_LL.flex<-list()
LWei.flex<-list()
LGomp.flex<-list()
G.flex.estimated<-list()
R.flex.estimated<-list()
LLG.par<-list()
LLR.par<-list()
v.flex.estimated<-list()
lambda.flex.estimated<-list()
## simulate for parameters beta,alpha and noise=i to search effect of noise on delta likl
## with change in parameters
for (beta in c(0.05,0.08, 0.1,0.15,0.17, 0.2, 0.25)){
for (alpha in c(1E-3, 0.002, 0.005,0.008, 0.01,0.03, 0.05)){ #fix alpha or in other words R shape parameter
#for (sd in seq(round.lifespan,3*round.lifespan,by=1)){
for (i in c(0, 0.5, 1, 2,3,4, 5)){
#for (i in c(0)){
#generate gompertz random numbers (lifespan)
#prediction
gompertz.random<-rgompertz(alpha,beta,N)
average.lifespan=mean(gompertz.random)
#store average.lifespan into MeanLF list
MeanLF[[length(MeanLF)+1]]=average.lifespan
#check the sd by using calculate.noise() function
sd.gaussian=calculate.noise(i)
#generate gaussion random numbers
gaussian<-rnorm(N, mean = 2*average.lifespan, sd=i)
#standard deviation of gompertz.random
sd.lifespan=sd(gompertz.random)
#store sd of lifespan into SdLS list
sdLS[[length(sdLS)+1]] =sd.lifespan
#add gaussian random numbers to gompertz random numbers
lifespan<- gompertz.random +gaussian
#calculate the mortality rate
m = alpha * exp( beta * lifespan )
log_m = log(alpha) + beta * lifespan;
beta.lifespan=beta*lifespan
#pdf(paste("plots/","alpha=",alpha,".","beta=",beta, ".batch.pdf", sep=''), width=5, height=5)
#plot(log_m ,beta.lifespan)
#dev.off()
#Log likelihood function for the Weibull model
weib.likl<-function(param,y){
theta<-exp(param[1]) #take exponential to avoid NaNs when taking log(theta)
gamma<-exp(param[2]) # avoid NaNs when taking log(gamma)
delta=1; # delta is 1 for right censored data which is our case; lifespan>0
y=lifespan[!is.na(lifespan)]
logl<-sum(delta*(log(gamma) + gamma*log(theta) + (gamma-1)*log(y) -
(theta*y)^gamma )) -sum((1-delta)*(theta*y)^gamma)
return(-logl)
}
# take log(param) since you take exponential above to avoid NaN values above
weib=optim(log(c(0.03,0.01)),weib.likl,y=lifespan)
weib$value
LWei[[length(LWei)+1]] = weib$value
#beta=0.05; alpha=0.02
# Log likelihood function of gompertz distiribution
gomp.likl <- function (param,y){
beta<-param[1]
alpha<-param[2]
delta=1
y=lifespan[!is.na(lifespan)]
logl<-sum(delta*(log(beta)+alpha*y+(-(beta/alpha)*(exp(alpha*y)-1)))) +
sum((1-delta)*(-(beta/alpha)*(exp(alpha*y)-1)))
return(-logl)
}
gomp<-optim(c(0.03,0.01),gomp.likl,y=lifespan)
gomp$value
#store loglikelihood values of gompertz optimized results into LGomp list
LGomp[[length(LGomp)+1]] = gomp$value
# store R and G estimation from optim of likl functions in Gompertz
LLG.par[[length(LLG.par)+1]] =gomp$par[1]
LLR.par[[length(LLR.par)+1]]=gomp$par[2]
delta.likelihood.wei<- weib$value-gomp$value
#calculate LL and noise change
sderr[[length(sderr)+1]] = i
Delta_LL[[length(Delta_LL)+1]] = delta.likelihood.wei
G[[length(G)+1]]=beta
#switch to alpha.seq when for fixed beta
R[[length(R)+1]]=alpha
#todo use flexsurv to calculate the LL
#flexsurv only works with positive variables.
#fix gaussian std to 0
gaussian.flex= rnorm(N, mean = 2*average.lifespan, sd=0)
X.flex= gompertz.random +gaussian.flex
fitGomp = flexsurvreg(formula = Surv(X.flex) ~ 1, dist="gompertz")
fitWei = flexsurvreg(formula = Surv(X.flex) ~ 1, dist="weibull")
LWei.flex[[length(LWei.flex)+1]]=fitWei$loglik
LGomp.flex[[length(LGomp.flex)+1]]=fitGomp$loglik
param.Gomp<-fitGomp$res; R.flex<-param.Gomp[1]; G.flex<-param.Gomp[2];
R.flex.estimated[[length(R.flex.estimated)+1]]<-R.flex
G.flex.estimated[[length(G.flex.estimated)+1]]<-G.flex
param.Wei<-fitWei$res; v.flex<-param.Wei[1]; lambda.flex<-param.Wei[2];
v.flex.estimated[[length(v.flex.estimated)+1]]<-v.flex;
lambda.flex.estimated[[length(lambda.flex.estimated)+1]]<-lambda.flex
delta_flexsurv=fitWei$loglik-fitGomp$loglik
#fitWei$loglik
Delta_LL.flex[[length(Delta_LL.flex)+1]]=delta_flexsurv
}
}
}
m = alpha * exp( beta * lifespan )
log_m = log(alpha) + beta * lifespan;
#pdf(paste("plots/","alpha=",alpha,".","beta=",beta, ".batch.pdf", sep=''), width=5, height=5)
plot(log_m ,lifespan)
#dev.off()
#make a data frame of list variables
results = data.frame(cbind(sderr), cbind(R),cbind(LLR.par),cbind(R.flex.estimated),cbind(G),cbind(LLG.par),cbind(G.flex.estimated),cbind(Delta_LL) , cbind(Delta_LL.flex),
cbind(LWei),cbind(LWei.flex), cbind(LGomp),cbind(LGomp.flex), cbind(MeanLF), cbind(sdLS))
# create a matrix from results data frame to store in Results.csv
results_mat<-as.matrix(results)
write.csv(results_mat,file="Results.csv")
#write.csv(results_mat,file="noise_zero.csv")
#unlist variables
dLL<-unlist(results$Delta_LL )
dLL.flex<-unlist(results$Delta_LL.flex)
LLGomp<-unlist(results$LGomp)
LLGomp.flex<- unlist(results$LGomp.flex)
LLWei<- unlist(results$LWei)
LLWei.flex<- unlist(results$LWei.flex)
simulated.G<-unlist(results$G)
estimated.G.flex<-unlist(results$G.flex.estimated)
simulated.R<-unlist(results$R)
estimated.R.flex<-unlist(results$R.flex.estimated)
estimatedLL.G<-unlist(results$LLG.par)
estimatedLL.R<-unlist(results$LLR.par)
summary( lm( dLL~ dLL.flex))
##
## Call:
## lm(formula = dLL ~ dLL.flex)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.1206 -0.5270 0.4211 0.8295 6.3646
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.96322 0.11519 -8.362 1.6e-15 ***
## dLL.flex -0.87082 0.02897 -30.064 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.511 on 341 degrees of freedom
## Multiple R-squared: 0.7261, Adjusted R-squared: 0.7253
## F-statistic: 903.8 on 1 and 341 DF, p-value: < 2.2e-16
summary( lm( LLGomp ~ LLGomp.flex))
##
## Call:
## lm(formula = LLGomp ~ LLGomp.flex)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.617 -9.160 -2.081 4.709 48.782
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 50.95039 4.67913 10.89 <2e-16 ***
## LLGomp.flex -0.87853 0.01357 -64.74 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.29 on 341 degrees of freedom
## Multiple R-squared: 0.9248, Adjusted R-squared: 0.9245
## F-statistic: 4191 on 1 and 341 DF, p-value: < 2.2e-16
summary( lm( LLWei ~ LLWei.flex))
##
## Call:
## lm(formula = LLWei ~ LLWei.flex)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.321 -8.393 -1.413 4.739 53.022
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 47.87647 4.39432 10.89 <2e-16 ***
## LLWei.flex -0.88483 0.01285 -68.87 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.64 on 341 degrees of freedom
## Multiple R-squared: 0.9329, Adjusted R-squared: 0.9327
## F-statistic: 4743 on 1 and 341 DF, p-value: < 2.2e-16
summary(lm(LLWei~LLGomp))
##
## Call:
## lm(formula = LLWei ~ LLGomp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.6808 -1.7530 0.6038 2.3202 4.9265
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.746237 1.233717 -3.037 0.00258 **
## LLGomp 1.000964 0.003489 286.928 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.887 on 341 degrees of freedom
## Multiple R-squared: 0.9959, Adjusted R-squared: 0.9959
## F-statistic: 8.233e+04 on 1 and 341 DF, p-value: < 2.2e-16
summary(lm(LLWei.flex~LLGomp.flex))
##
## Call:
## lm(formula = LLWei.flex ~ LLGomp.flex)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.9219 -2.1739 -0.8229 1.7590 8.9761
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.337985 1.075108 2.175 0.0303 *
## LLGomp.flex 0.998624 0.003118 320.279 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.824 on 341 degrees of freedom
## Multiple R-squared: 0.9967, Adjusted R-squared: 0.9967
## F-statistic: 1.026e+05 on 1 and 341 DF, p-value: < 2.2e-16
summary(lm(simulated.G~estimated.G.flex))
##
## Call:
## lm(formula = simulated.G ~ estimated.G.flex)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.09842 -0.05809 0.00196 0.05159 0.13519
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.148425 0.003988 37.215 < 2e-16 ***
## estimated.G.flex -27.929553 9.603754 -2.908 0.00387 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0648 on 341 degrees of freedom
## Multiple R-squared: 0.0242, Adjusted R-squared: 0.02134
## F-statistic: 8.458 on 1 and 341 DF, p-value: 0.003874
summary(lm(simulated.R~estimated.R.flex))
##
## Call:
## lm(formula = simulated.R ~ estimated.R.flex)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.024433 -0.011639 -0.005756 0.010375 0.039543
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.003336 0.002149 1.553 0.121
## estimated.R.flex 0.069033 0.011492 6.007 4.85e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01608 on 341 degrees of freedom
## Multiple R-squared: 0.09569, Adjusted R-squared: 0.09304
## F-statistic: 36.08 on 1 and 341 DF, p-value: 4.846e-09
summary(lm(simulated.R~estimatedLL.R))
##
## Call:
## lm(formula = simulated.R ~ estimatedLL.R)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.024678 -0.011004 -0.006313 0.010732 0.039144
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.004236 0.002252 1.881 0.0608 .
## estimatedLL.R 0.072217 0.013731 5.260 2.55e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01626 on 341 degrees of freedom
## Multiple R-squared: 0.07504, Adjusted R-squared: 0.07232
## F-statistic: 27.66 on 1 and 341 DF, p-value: 2.555e-07
summary(lm(simulated.G~estimatedLL.G))
##
## Call:
## lm(formula = simulated.G ~ estimatedLL.G)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.100884 -0.060921 0.008902 0.057436 0.109104
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.140896 0.003905 36.081 <2e-16 ***
## estimatedLL.G 5.078914 4.299420 1.181 0.238
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06546 on 341 degrees of freedom
## Multiple R-squared: 0.004076, Adjusted R-squared: 0.001155
## F-statistic: 1.395 on 1 and 341 DF, p-value: 0.2383
results.sub<-data.frame(cbind(sderr),cbind(R),cbind(G),cbind(Delta_LL))
R.els = unlist( unique(results.sub$R))
colnum = length(R.els)
tmp = unlist( unique(results.sub$sderr))
noise.els = tmp[order(tmp)]
rownum = length(noise.els)
mat = matrix( data=NA, nrow= rownum, ncol=colnum) #noise as row, alpha as columns
rownames(mat) = noise.els
colnames(mat) = R.els
for (k in c(0.05,0.08, 0.1,0.15,0.17, 0.2, 0.25)){
data = results.sub[results.sub[,3]==k, 4]
data<-unlist(data)
heat_mat<-matrix(data,ncol=colnum,nrow=rownum)
#rownames(heat_mat, do.NULL = TRUE, prefix = "row")
rownames(heat_mat) <- c("0","0.5","1","2","3","4","5")
colnames(heat_mat) <- R.els
library(gplots)
hM <- format(round(heat_mat, 2))
data_mat<-scale(heat_mat,scale=TRUE,center=FALSE)
#paste(file = "~/github/model.comparison/plots/heatplot_zero_noise_G",k,".jpeg",sep="")
#jpeg(paste("plots/",k, ".fixed.G.jpg", sep=''))
#paste(“myplot_”, i, “.jpeg”, sep=””)
heatmap.2(data_mat, cellnote=hM,col = cm.colors(256), scale="none", notecol="black", margins=c(5,10),
dendrogram='none', Rowv=FALSE, Colv=FALSE,trace='none',
xlab = "R parameters",
ylab = "noise", main = bquote(paste("R vs. sd dLL at" ~ G==.(k))),par(cex.main=.5),srtCol=315, adjCol = c(0,1),cexRow=0.8,cexCol=0.8)
#dev.off()
}
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
G.els = unlist( unique(results.sub$G))
colnum = length(G.els)
R.els=unlist(unique(results.sub$R))
rownum = length(R.els)
mat = matrix( data=NA, nrow= rownum, ncol=colnum) #noise as row, alpha as columns
rownames(mat) = R.els
colnames(mat) = G.els
for (n in c(0, 0.5, 1,2,3,4,5) ){
data = results.sub[results.sub[,1]==n, 4]
data<-unlist(data)
heat_mat<-matrix(data,ncol=colnum,nrow=rownum)
#rownames(heat_mat, do.NULL = TRUE, prefix = "row")
rownames(heat_mat) <- R.els
colnames(heat_mat) <- G.els
library(gplots)
hM <- format(round(heat_mat, 2))
data_mat<-scale(heat_mat,scale=TRUE,center=FALSE)
#paste(file = "~/github/model.comparison/plots/heatplot_zero_noise_G",k,".jpeg",sep="")
#jpeg(paste("plots/",n, ".fixed_noise.jpg", sep=''))
#paste(“myplot_”, i, “.jpeg”, sep=””)
heatmap.2(data_mat, cellnote=hM,col = cm.colors(256), scale="none", notecol="black", margins=c(5,10),
dendrogram='none', Rowv=FALSE, Colv=FALSE,trace='none',
xlab = "R parameters",
ylab = "G parameters", main = bquote(paste("R vs G of dLL at" ~ sd==.(n))),par(cex.main=.5),srtCol=315, adjCol = c(0,1),cexRow=0.8,cexCol=0.8)
#dev.off()
}
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
G.els = unlist( unique(results.sub$G))
colnum = length(G.els)
tmp = unlist( unique(results.sub$sderr))
noise.els = tmp[order(tmp)]
rownum = length(noise.els)
mat = matrix( data=NA, nrow= rownum, ncol=colnum) #noise as row, alpha as columns
rownames(mat) = noise.els
colnames(mat) = G.els
for (j in c(1E-3, 0.002, 0.005,0.008, 0.01,0.03, 0.05) ){
data = results.sub[results.sub[,2]==j, 4]
data<-unlist(data)
heat_mat<-matrix(data,ncol=colnum,nrow=rownum)
#rownames(heat_mat, do.NULL = TRUE, prefix = "row")
rownames(heat_mat) <- c("0","0.5","1","2","3","4","5")
colnames(heat_mat) <- G.els
library(gplots)
hM <- format(round(heat_mat, 2))
data_mat<-scale(heat_mat,scale=TRUE,center=FALSE)
#paste(file = "~/github/model.comparison/plots/heatplot_zero_noise_G",k,".jpeg",sep="")
#jpeg(paste("plots/",j, ".fixed_R.jpg", sep=''))
#paste(“myplot_”, i, “.jpeg”, sep=””)
heatmap.2(data_mat, cellnote=hM,col = cm.colors(256), scale="none", notecol="black", margins=c(5,10),
dendrogram='none', Rowv=FALSE, Colv=FALSE,trace='none',
xlab = "G parameters",
ylab = "noise", main = bquote(paste("G vs. sd of dLL at" ~ R==.(j))),par(cex.main=.5),srtCol=315, adjCol = c(0,1),cexRow=0.8,cexCol=0.8)
#dev.off()
}
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL